import pandas as pd
city_data_df = pd.read_csv ("city_data.csv")
city_list_df = pd.read_csv ("city_list.csv")
global_data_df = pd.read_csv ("global_data.csv")
print(city_data_df.count())
year 70792 city 70792 country 70792 avg_temp 68245 dtype: int64
city_data_df.describe()
| year | avg_temp | |
|---|---|---|
| count | 70792.000000 | 68245.000000 |
| mean | 1905.643576 | 16.138845 |
| std | 67.187043 | 7.603714 |
| min | 1743.000000 | -14.350000 |
| 25% | 1858.000000 | 9.710000 |
| 50% | 1910.000000 | 16.080000 |
| 75% | 1962.000000 | 23.820000 |
| max | 2013.000000 | 30.730000 |
print(city_data_df.head())
year city country avg_temp 0 1849 Abidjan Côte D'Ivoire 25.58 1 1850 Abidjan Côte D'Ivoire 25.52 2 1851 Abidjan Côte D'Ivoire 25.67 3 1852 Abidjan Côte D'Ivoire NaN 4 1853 Abidjan Côte D'Ivoire NaN
print(city_list_df.head())
city country 0 Abidjan Côte D'Ivoire 1 Abu Dhabi United Arab Emirates 2 Abuja Nigeria 3 Accra Ghana 4 Adana Turkey
print(global_data_df.head())
year avg_temp 0 1750 8.72 1 1751 7.98 2 1752 5.78 3 1753 8.39 4 1754 8.47
global_data_df.describe()
| year | avg_temp | |
|---|---|---|
| count | 266.000000 | 266.000000 |
| mean | 1882.500000 | 8.369474 |
| std | 76.931788 | 0.584747 |
| min | 1750.000000 | 5.780000 |
| 25% | 1816.250000 | 8.082500 |
| 50% | 1882.500000 | 8.375000 |
| 75% | 1948.750000 | 8.707500 |
| max | 2015.000000 | 9.830000 |
listOFDuplicate=city_list_df[city_list_df.duplicated()]
print(listOFDuplicate.count())
city 0 country 0 dtype: int64
listOFDuplicate=global_data_df[global_data_df.duplicated()]
print(listOFDuplicate.count())
year 0 avg_temp 0 dtype: int64
listOFDuplicate=city_data_df[city_data_df.duplicated()]
print(listOFDuplicate.count())
year 0 city 0 country 0 avg_temp 0 dtype: int64
listOfNanRows = city_data_df[city_data_df.isnull().any(axis=1)]
print(listOfNanRows.count())
#print(city_data_df[city_data_df.isnull()].count())
year 2547 city 2547 country 2547 avg_temp 0 dtype: int64
listOfNanRows = global_data_df[global_data_df.isnull().any(axis=1)]
print(listOfNanRows)
Empty DataFrame Columns: [year, avg_temp] Index: []
listOfNanRows = city_list_df[city_list_df.isnull().any(axis=1)]
print(listOfNanRows)
Empty DataFrame Columns: [city, country] Index: []
#print(city_data_df[city_data_df['avg_temp'].isnull()].head())
print(city_data_df.empty)
False
print(global_data_df.empty)
False
print(city_list_df.empty)
False
citiesTurkey=city_data_df[city_data_df['country']=='Turkey']
print(citiesTurkey)
year city country avg_temp 659 1791 Adana Turkey 21.62 660 1792 Adana Turkey 18.57 661 1793 Adana Turkey 18.44 662 1794 Adana Turkey 18.82 663 1795 Adana Turkey 18.68 ... ... ... ... ... 28160 2009 Izmir Turkey 18.41 28161 2010 Izmir Turkey 19.27 28162 2011 Izmir Turkey 17.60 28163 2012 Izmir Turkey 18.54 28164 2013 Izmir Turkey 19.21 [1285 rows x 4 columns]
myCityAnkara=citiesTurkey[citiesTurkey["city"]=="Ankara"]
print(myCityAnkara)
year city country avg_temp 3321 1755 Ankara Turkey 9.62 3322 1756 Ankara Turkey 10.99 3323 1757 Ankara Turkey 10.82 3324 1758 Ankara Turkey 8.33 3325 1759 Ankara Turkey 9.93 ... ... ... ... ... 3575 2009 Ankara Turkey 11.63 3576 2010 Ankara Turkey 13.27 3577 2011 Ankara Turkey 10.39 3578 2012 Ankara Turkey 11.45 3579 2013 Ankara Turkey 13.27 [259 rows x 4 columns]
myCityAnkara.describe()
| year | avg_temp | |
|---|---|---|
| count | 259.000000 | 259.000000 |
| mean | 1884.000000 | 10.394431 |
| std | 74.911058 | 0.703879 |
| min | 1755.000000 | 8.330000 |
| 25% | 1819.500000 | 9.930000 |
| 50% | 1884.000000 | 10.390000 |
| 75% | 1948.500000 | 10.820000 |
| max | 2013.000000 | 13.270000 |
ankaraistOfNanRows = myCityAnkara[myCityAnkara.isnull().any(axis=1)]
myCityAnkara['avg_temp'] = myCityAnkara['avg_temp'].fillna((myCityAnkara['avg_temp'].mean()))
resultNA = myCityAnkara[myCityAnkara.isnull().any(axis=1)]
print(myCityAnkara.count())
year 259 city 259 country 259 avg_temp 259 dtype: int64
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy """Entry point for launching an IPython kernel.
print(myCityAnkara.head())
print(myCityAnkara["avg_temp"].count())
year city country avg_temp 3321 1755 Ankara Turkey 9.62 3322 1756 Ankara Turkey 10.99 3323 1757 Ankara Turkey 10.82 3324 1758 Ankara Turkey 8.33 3325 1759 Ankara Turkey 9.93 259
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pylab as plb
from matplotlib.ticker import FormatStrFormatter
plt.close("all")
maxAnkara=myCityAnkara["avg_temp"].max()
minAnkara=myCityAnkara["avg_temp"].min()
print(maxAnkara)
print(minAnkara)
13.27 8.33
meanmyCityAnkara=myCityAnkara["avg_temp"].mean()
print(meanmyCityAnkara)
10.394430894308941
standardmyCityAnkara=myCityAnkara["avg_temp"].std()
print(standardmyCityAnkara)
0.7038792574102576
#myCityAnkaraBarplot = pd.Series(myCityAnkara["avg_temp"], index=range(1755,2014))
lengthOfMyCityAnkara=myCityAnkara["avg_temp"].count()
dAnkara = {'year': range(1755,2014), 'avg_temp': myCityAnkara["avg_temp"].values,
"mean":[meanmyCityAnkara for i in range(lengthOfMyCityAnkara)],
"standard deviation":[standardmyCityAnkara for i in range(lengthOfMyCityAnkara)]}
myCityAnkaraBarplot=pd.DataFrame(data=dAnkara)
fig = plt.figure(figsize=(16,10), dpi=600)
#plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d '))
plt.plot(myCityAnkaraBarplot["year"], myCityAnkaraBarplot["avg_temp"], color="navy",linewidth=2)
plt.plot(myCityAnkaraBarplot["year"], myCityAnkaraBarplot["mean"], color="darkorange",linewidth=3)
#plt.bar(myCityAnkaraBarplot["year"], myCityAnkaraBarplot["avg_temp"], color ='maroon',width = 0.4)
plt.title("Ankara temperature changes", fontsize=18)
plt.legend(["Ankara","Mean"], loc ="upper left",fontsize=14)
plt.xlabel('Years', fontsize=16)
plt.ylabel('Temperature $C^\circ$', fontsize=16)
plt.show()
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/cbook/__init__.py:1402: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. x[:, None] /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/axes/_base.py:276: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. x = x[:, np.newaxis] /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/axes/_base.py:278: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. y = y[:, np.newaxis]
import numpy as np
print(myCityAnkara.shape[0])
resultMovingAvgwithTen = np.array([])
for i in range(myCityAnkara.shape[0]-10):
arr = np.array([])
for j in range(i,10+i):
avgTemp=myCityAnkara.iloc[j]['avg_temp']
arr=np.append(arr,avgTemp)
meanValuesWithTenYears=np.mean(arr)
resultMovingAvgwithTen=np.append(resultMovingAvgwithTen,meanValuesWithTenYears)
print(len(resultMovingAvgwithTen))
yearsArr=np.array([])
for i in range(0,myCityAnkara.shape[0]-10,10):
yearsArr=np.append(yearsArr,myCityAnkara.iloc[i])
259 249
columns = {'avg_temp':resultMovingAvgwithTen }
resultsWithTenYears = pd.DataFrame(data=columns)
print(resultsWithTenYears.head())
avg_temp 0 10.054772 1 10.132215 2 10.072659 3 10.030102 4 10.236545
ankaraTenYearsMovingAvgPlot = pd.Series(resultMovingAvgwithTen, index=range(1765,2014))
#ax = ankaraTenYearsMovingAvgPlot.plot(colormap='jet', title='temperature changes in Ankara with moving average')
#ax.set_xlabel("Years")
#ax.set_ylabel("Moving Averages with 10 Years")
fig = plt.figure(figsize=(16,10), dpi=600)
#plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d $C^\circ$'))
MovingAvgAnkara = {'year': range(1765,2014),
'avg_temp': resultsWithTenYears["avg_temp"].values,
}
myCityAnkaraBarplot=pd.DataFrame(data=MovingAvgAnkara)
plt.plot(myCityAnkaraBarplot["year"], MovingAvgAnkara["avg_temp"], color="navy",linewidth=2)
#plt.bar(myCityAnkaraBarplot["year"], myCityAnkaraBarplot["avg_temp"], color ='maroon',width = 0.4)
plt.title("Ankara temperature changes", fontsize=18)
plt.legend(["Ankara","Mean"], loc ="upper left",fontsize=14)
plt.xlabel('Years', fontsize=16)
plt.ylabel('Moving Avg Temp $C^\circ$', fontsize=16)
plt.show()
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/cbook/__init__.py:1402: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. x[:, None] /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/axes/_base.py:276: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. x = x[:, np.newaxis]
print(global_data_df.head())
year avg_temp 0 1750 8.72 1 1751 7.98 2 1752 5.78 3 1753 8.39 4 1754 8.47
maxGlobal=global_data_df["avg_temp"].max()
minGlobal=global_data_df["avg_temp"].min()
meanOfGlobal=global_data_df["avg_temp"].mean()
standarDeviationGlobal=global_data_df["avg_temp"].std()
print(f"max {maxGlobal}")
print(f"min {minGlobal}")
print(f"meanOfGlobal: {meanOfGlobal}")
print(f"standard deviation: {standarDeviationGlobal}")
max 9.83 min 5.78 meanOfGlobal: 8.369473684210526 standard deviation: 0.5847474097994193
ig = plt.figure(figsize=(16,10), dpi=600)
#plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d $C^\circ$'))
lengthOfGlobal=global_data_df["avg_temp"].count()
print(lengthOfGlobal)
MovingAvgGlobal = {'year': range(1750,2016),
'avg_temp': global_data_df["avg_temp"],
"mean":[meanOfGlobal for i in range(lengthOfGlobal)]
}
globalTenYearsMovingAveragePlot=pd.DataFrame(data=MovingAvgGlobal)
plt.plot(globalTenYearsMovingAveragePlot["year"], globalTenYearsMovingAveragePlot["avg_temp"], color="darkred",linewidth=2)
plt.plot(globalTenYearsMovingAveragePlot["year"], globalTenYearsMovingAveragePlot["mean"], color="darkorange",linewidth=3)
#plt.bar(myCityAnkaraBarplot["year"], myCityAnkaraBarplot["avg_temp"], color ='maroon',width = 0.4)
plt.title("Global temperature changes", fontsize=18)
plt.legend(["Global","Mean"], loc ="upper left",fontsize=14)
plt.xlabel('Years', fontsize=16)
plt.ylabel('Moving average temperature $C^\circ$', fontsize=16)
plt.show()
266
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/cbook/__init__.py:1402: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. x[:, None] /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/axes/_base.py:276: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. x = x[:, np.newaxis] /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/axes/_base.py:278: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. y = y[:, np.newaxis]
print(global_data_df.shape[0])
resultMovingAvgwithTenGlobal = np.array([])
for i in range(global_data_df.shape[0]-10):
arr = np.array([])
for j in range(i,10+i):
avgTemp=global_data_df.iloc[j]['avg_temp']
arr=np.append(arr,avgTemp)
meanValuesWithTenYearsGlobal=np.mean(arr)
resultMovingAvgwithTenGlobal=np.append(resultMovingAvgwithTenGlobal,meanValuesWithTenYearsGlobal)
print(len(resultMovingAvgwithTenGlobal))
yearsArr=np.array([])
for i in range(0,global_data_df.shape[0]-10,10):
yearsArr=np.append(yearsArr,global_data_df.iloc[i])
266 256
#globalTenYearsMovingAvgPlot = pd.Series(resultMovingAvgwithTenGlobal, index=range(1760,2016))
"""
gl = globalTenYearsMovingAvgPlot.plot(colormap='jet', title='temperature changes in global with moving average (1750-2015)')
gl.set_xlabel("Years")
gl.set_ylabel("Moving Averages with 10 Years")
"""
fig = plt.figure(figsize=(16,10), dpi=600)
#plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d $C^\circ$'))
MovingAvgGlobal = {'year': range(1760,2016),
'avg_temp': resultMovingAvgwithTenGlobal,
}
globalTenYearsMovingAveragePlot=pd.DataFrame(data=MovingAvgGlobal)
plt.plot(globalTenYearsMovingAveragePlot["year"], globalTenYearsMovingAveragePlot["avg_temp"], color="darkred",linewidth=2)
#plt.bar(myCityAnkaraBarplot["year"], myCityAnkaraBarplot["avg_temp"], color ='maroon',width = 0.4)
plt.title("Global temperature changes", fontsize=18)
plt.legend(["Global"], loc ="upper left",fontsize=14)
plt.xlabel('Years', fontsize=16)
plt.ylabel('Moving average temperature $C^\circ$', fontsize=16)
plt.show()
newDataSimilartoAnakarYear=global_data_df[(global_data_df["year"] >=1755) & (global_data_df["year"] <=2013)]
print(newDataSimilartoAnakarYear.head())
year avg_temp 5 1755 8.36 6 1756 8.85 7 1757 9.02 8 1758 6.74 9 1759 7.99
print(newDataSimilartoAnakarYear.shape[0])
resultMovingAvgwithTenGlobal1 = np.array([])
for i in range(newDataSimilartoAnakarYear.shape[0]-10):
arr = np.array([])
for j in range(i,10+i):
avgTemp=newDataSimilartoAnakarYear.iloc[j]['avg_temp']
arr=np.append(arr,avgTemp)
meanValuesWithTenYearsGlobal=np.mean(arr)
resultMovingAvgwithTenGlobal1=np.append(resultMovingAvgwithTenGlobal1,meanValuesWithTenYearsGlobal)
print(len(resultMovingAvgwithTenGlobal1))
yearsArr=np.array([])
for i in range(0,newDataSimilartoAnakarYear.shape[0]-10,10):
yearsArr=np.append(yearsArr,global_data_df.iloc[i])
259 249
"""
globalTenYearsMovingAvgPlot1 = pd.Series(resultMovingAvgwithTenGlobal1, index=range(1765,2014))
gl1 = globalTenYearsMovingAvgPlot1.plot(colormap='jet', title='temperature changes in global with moving average (1755-2013)')
gl1.set_xlabel("Years")
gl1.set_ylabel("Moving Averages with 10 Years")
"""
fig = plt.figure(figsize=(16,10), dpi=600)
MovingAvgGlobal = {'year': range(1765,2014),
'avg_temp': resultMovingAvgwithTenGlobal1,
}
MovingAvgAnkara = {'year': range(1765,2014),
'avg_temp': resultsWithTenYears["avg_temp"].values,
}
myCityAnkaraBarplot=pd.DataFrame(data=MovingAvgAnkara)
plt.plot(myCityAnkaraBarplot["year"], MovingAvgAnkara["avg_temp"], color="navy",linewidth=2)
globalTenYearsMovingAveragePlot=pd.DataFrame(data=MovingAvgGlobal)
plt.plot(globalTenYearsMovingAveragePlot["year"], globalTenYearsMovingAveragePlot["avg_temp"], color="darkred",linewidth=2)
plt.title("Ankara vs Global temperature changes", fontsize=18)
plt.legend(["Ankara","Global"], loc ="upper left",fontsize=14)
plt.xlabel('Years', fontsize=16)
plt.ylabel('Moving average temperature $C^\circ$', fontsize=16)
plt.show()
/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/cbook/__init__.py:1402: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. x[:, None] /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/axes/_base.py:276: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. x = x[:, np.newaxis] /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/axes/_base.py:278: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. y = y[:, np.newaxis]
globalvsAnkara = pd.DataFrame(columns = ['Ankara AVG_temp', 'Global AVG_temp'])
globalvsAnkara['Ankara AVG_temp'] = myCityAnkaraBarplot["avg_temp"]
globalvsAnkara['Global AVG_temp'] = globalTenYearsMovingAveragePlot["avg_temp"]
globalvsAnkara.head()
| Ankara AVG_temp | Global AVG_temp | |
|---|---|---|
| 0 | 10.054772 | 8.143 |
| 1 | 10.132215 | 8.132 |
| 2 | 10.072659 | 8.088 |
| 3 | 10.030102 | 8.008 |
| 4 | 10.236545 | 8.012 |
ank = globalvsAnkara['Ankara AVG_temp'].tolist()
glb = globalvsAnkara['Global AVG_temp'].tolist()
year = globalvsAnkara.index.values.tolist()
#ymax = max(male)
#ymin = min(female)
fig = plt.figure(figsize=(16,10), dpi=600)
plt.plot(year, ank, marker=".", color="navy",linewidth=3)
plt.plot(year, glb, marker=".", color="darkred",linewidth=3)
plt.fill_between(year, ank, glb, color="orange", alpha="0.4")
plt.title("Ankara vs Global temperature changes", fontsize=18)
plt.legend(["Ankara", "Global"], loc ="upper left",fontsize=14)
plt.xlabel('Years', fontsize=16)
plt.ylabel('Moving average temperature $C^\circ$', fontsize=16)
plt.show()